# Use R to produce information similar to that # shown on the web page generated at # http://courses.wccnet.edu/~palay/math160r/setuplookatsamples.htm # First, look at a population that is # normally distributed. # We will use the gnrnd5 function to generate # the initial population of 8000 items so that # we have control over it and can easily generate # the same population at different times and places. source( "../gnrnd5.R") # Note, change the 34343 in the first argument # to generate different values gnrnd5(434343799904, 1000000000 ) source("../pop_sd.R") sigma <- pop_sd( L1 ) mu <- mean( L1 ) # now decide on the desired mean and standard # deviation desired_mean <- 142 # change this if you want desired_sd <- 13.6 # change this if you want new_pop <- (L1 - mu)/sigma*desired_sd + desired_mean # let us look at the new_pop head( new_pop, 12 ) tail( new_pop, 12) summary( new_pop ) mean( new_pop ) pop_sd( new_pop ) hist( new_pop ) # a slightly better plot hist( new_pop, main="Our underlying population", breaks=30) boxplot( new_pop, horizontal=TRUE, main="Our underlying population") source("../assess_normality.R") assess_normality( new_pop ) # now, we want to take 1000 samples, each of # some specified size, and then look at the # population of the sample means, and even of # the sample standard deviations. desired_sample_size <- 32 # change this if you want # Note that each time you perform the following # lines you will get a different sequence of # 1000 samples my_sample_means <- 1:1000 my_sample_sds <- 1:1000 for (i in 1:1000) { this_sample <- sample( new_pop, desired_sample_size) my_sample_means[i] <- mean( this_sample ) my_sample_sds[i] <- sd( this_sample ) } # look at the sample means head( my_sample_means, 12) tail( my_sample_means, 12) mean( my_sample_means ) pop_sd( my_sample_means ) # recall the predicted values mean( new_pop ) pop_sd( new_pop)/ sqrt( desired_sample_size ) # then, look at the mean of the sample # standard deviations mean( my_sample_sds ) # check out a few graphs hist( my_sample_means, breaks=30, main="Histogram of sample means") boxplot( my_sample_means, horizontal=TRUE, main="Boxplot of sample means") assess_normality( my_sample_means) ###################################### ## everything is as we expect. Feel free to ## go back and get new samples, start at ## line 55. ## Or, feel free to change the sample size ## by changing line 49 and then running ## the lines after that. ## Or feel free to change the mean or ## standard deviation of the population by ## changing lines 23 and/or 24 and then ## running the lines after that. ## Or feel free to change the original ## distribution by changing the 5 digit ## seed value pointed out in lines 14-16 ###################################### ################################### ## Now let us do the same thing, but this time ## we will start with what is essentially a ## uniform distribution. # Note, change the 34343 in the first argument # to generate different values gnrnd5(434343799901, 9678394327 ) sigma <- pop_sd( L1 ) mu <- mean( L1 ) # now decide on the desired mean and standard # deviation desired_mean <- 142 # change this if you want desired_sd <- 13.6 # change this if you want new_pop <- (L1 - mu)/sigma*desired_sd + desired_mean # let us look at the new_pop head( new_pop, 12 ) tail( new_pop, 12) summary( new_pop ) mean( new_pop ) pop_sd( new_pop ) hist( new_pop ) # a slightly better plot hist( new_pop, main="Our underlying population", breaks=30) boxplot( new_pop, horizontal=TRUE, main="Our underlying population") assess_normality( new_pop ) # now, we want to take 1000 samples, each of # some specified size, and then look at the # population of the sample means, and even of # the sample standard deviations. desired_sample_size <- 32 # change this if you want # Note that each time you perform the following # lines you will get a different sequence of # 1000 samples my_sample_means <- 1:1000 my_sample_sds <- 1:1000 for (i in 1:1000) { this_sample <- sample( new_pop, desired_sample_size) my_sample_means[i] <- mean( this_sample ) my_sample_sds[i] <- sd( this_sample ) } # look at the sample means head( my_sample_means, 12) tail( my_sample_means, 12) mean( my_sample_means ) pop_sd( my_sample_means ) # recall the predicted values mean( new_pop ) pop_sd( new_pop)/ sqrt( desired_sample_size ) # then, look at the mean of the sample # standard deviations mean( my_sample_sds ) # check out a few graphs hist( my_sample_means, breaks=30, main="Histogram of sample means") boxplot( my_sample_means, horizontal=TRUE, main="Boxplot of sample means") assess_normality( my_sample_means) ###################################### ## everything is as we expect. Feel free to ## go back and get new samples, start at ## line 148. ## Or, feel free to change the sample size ## by changing line 142 and then running ## the lines after that. ## Or feel free to change the mean or ## standard deviation of the population by ## changing lines 116 and/or 117 and then ## running the lines after that. ## Or feel free to change the original ## distribution by changing the 5 digit ## seed value pointed out in lines 107 to 109 ###################################### ################################### ## Now let us do the same thing, but this time ## we will start with what is essentially a ## skewed right distribution. # Note, change the 34343 in the first argument # to generate different values gnrnd5(434343799902, 9678394327 ) sigma <- pop_sd( L1 ) mu <- mean( L1 ) # now decide on the desired mean and standard # deviation desired_mean <- 142 # change this if you want desired_sd <- 13.6 # change this if you want new_pop <- (L1 - mu)/sigma*desired_sd + desired_mean # let us look at the new_pop head( new_pop, 12 ) tail( new_pop, 12) summary( new_pop ) mean( new_pop ) pop_sd( new_pop ) hist( new_pop ) # a slightly better plot hist( new_pop, main="Our underlying population", breaks=30) boxplot( new_pop, horizontal=TRUE, main="Our underlying population") assess_normality( new_pop ) # now, we want to take 1000 samples, each of # some specified size, and then look at the # population of the sample means, and even of # the sample standard deviations. desired_sample_size <- 32 # change this if you want # Note that each time you perform the following # lines you will get a different sequence of # 1000 samples my_sample_means <- 1:1000 my_sample_sds <- 1:1000 for (i in 1:1000) { this_sample <- sample( new_pop, desired_sample_size) my_sample_means[i] <- mean( this_sample ) my_sample_sds[i] <- sd( this_sample ) } # look at the sample means head( my_sample_means, 12) tail( my_sample_means, 12) mean( my_sample_means ) pop_sd( my_sample_means ) # recall the predicted values mean( new_pop ) pop_sd( new_pop)/ sqrt( desired_sample_size ) # then, look at the mean of the sample # standard deviations mean( my_sample_sds ) # check out a few graphs hist( my_sample_means, breaks=30, main="Histogram of sample means") boxplot( my_sample_means, horizontal=TRUE, main="Boxplot of sample means") assess_normality( my_sample_means) ###################################### ## everything is as we expect. Feel free to ## go back and get new samples, start at ## line 241. ## Or, feel free to change the sample size ## by changing line 235 and then running ## the lines after that. ## Or feel free to change the mean or ## standard deviation of the population by ## changing lines 209 and/or 210 and then ## running the lines after that. ## Or feel free to change the original ## distribution by changing the 5 digit ## seed value pointed out in lines 200 to 202 ######################################